In [1]:
    
# Import py_entitymatching package
import py_entitymatching as em
import os
import pandas as pd
    
Then, read the (sample) input tables
In [2]:
    
# Get the datasets directory
datasets_dir = em.get_install_path() + os.sep + 'datasets'
# Get the paths of the input tables
path = datasets_dir + os.sep + 'dblp_demo.csv'
    
In [3]:
    
# Read the CSV file and set 'ID' as the key attribute
A = em.read_csv_metadata(path, key='id')
B = em.read_csv_metadata(path, key='id')
A.head()
    
    
    Out[3]:
In [4]:
    
# Invoke the open refine gui for data exploration
p = em.data_explore_openrefine(A, name='Table')
    
In [5]:
    
# Save the project back to our dataframe
# after calling export_pandas_frame, the openRefine project will be deleted automatically
A = p.export_pandas_frame()
    
In [6]:
    
A.head()
    
    Out[6]:
In [7]:
    
# Invoke the pandastable gui for data exploration
# The process will be blocked until closing the GUI
em.data_explore_pandastable(B)
    
In [8]:
    
B.head()
    
    Out[8]:
In [ ]: